// Copyright (c) 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/disk_cache/simple/simple_index_file.h"

#include <utility>
#include <vector>

#include "base/files/file.h"
#include "base/files/file_util.h"
#include "base/files/memory_mapped_file.h"
#include "base/hash.h"
#include "base/logging.h"
#include "base/numerics/safe_conversions.h"
#include "base/pickle.h"
#include "base/single_thread_task_runner.h"
#include "base/task_runner_util.h"
#include "base/threading/thread_restrictions.h"
#include "net/disk_cache/simple/simple_backend_version.h"
#include "net/disk_cache/simple/simple_entry_format.h"
#include "net/disk_cache/simple/simple_histogram_macros.h"
#include "net/disk_cache/simple/simple_index.h"
#include "net/disk_cache/simple/simple_synchronous_entry.h"
#include "net/disk_cache/simple/simple_util.h"
#include "third_party/zlib/zlib.h"

using base::File;

namespace disk_cache {
namespace {

    const int kEntryFilesHashLength = 16;
    const int kEntryFilesSuffixLength = 2;

    const uint64_t kMaxEntriesInIndex = 100000000;

    uint32_t CalculatePickleCRC(const base::Pickle& pickle)
    {
        return crc32(crc32(0, Z_NULL, 0),
            reinterpret_cast<const Bytef*>(pickle.payload()),
            pickle.payload_size());
    }

    // Used in histograms. Please only add new values at the end.
    enum IndexFileState {
        INDEX_STATE_CORRUPT = 0,
        INDEX_STATE_STALE = 1,
        INDEX_STATE_FRESH = 2,
        INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3,
        INDEX_STATE_MAX = 4,
    };

    enum StaleIndexQuality {
        STALE_INDEX_OK = 0,
        STALE_INDEX_MISSED_ENTRIES = 1,
        STALE_INDEX_EXTRA_ENTRIES = 2,
        STALE_INDEX_BOTH_MISSED_AND_EXTRA_ENTRIES = 3,
        STALE_INDEX_MAX = 4,
    };

    void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type)
    {
        SIMPLE_CACHE_UMA(ENUMERATION,
            "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX);
    }

    void UmaRecordIndexInitMethod(SimpleIndex::IndexInitMethod method,
        net::CacheType cache_type)
    {
        SIMPLE_CACHE_UMA(ENUMERATION, "IndexInitializeMethod", cache_type, method,
            SimpleIndex::INITIALIZE_METHOD_MAX);
    }

    void UmaRecordIndexWriteReason(SimpleIndex::IndexWriteToDiskReason reason,
        net::CacheType cache_type)
    {
        SIMPLE_CACHE_UMA(ENUMERATION, "IndexWriteReason", cache_type, reason,
            SimpleIndex::INDEX_WRITE_REASON_MAX);
    }

    void UmaRecordIndexWriteReasonAtLoad(SimpleIndex::IndexWriteToDiskReason reason,
        net::CacheType cache_type)
    {
        SIMPLE_CACHE_UMA(ENUMERATION, "IndexWriteReasonAtLoad", cache_type, reason,
            SimpleIndex::INDEX_WRITE_REASON_MAX);
    }

    void UmaRecordStaleIndexQuality(int missed_entry_count,
        int extra_entry_count,
        net::CacheType cache_type)
    {
        SIMPLE_CACHE_UMA(CUSTOM_COUNTS, "StaleIndexMissedEntryCount", cache_type,
            missed_entry_count, 1, 100, 5);
        SIMPLE_CACHE_UMA(CUSTOM_COUNTS, "StaleIndexExtraEntryCount", cache_type,
            extra_entry_count, 1, 100, 5);

        StaleIndexQuality quality;
        if (missed_entry_count > 0 && extra_entry_count > 0)
            quality = STALE_INDEX_BOTH_MISSED_AND_EXTRA_ENTRIES;
        else if (missed_entry_count > 0)
            quality = STALE_INDEX_MISSED_ENTRIES;
        else if (extra_entry_count > 0)
            quality = STALE_INDEX_EXTRA_ENTRIES;
        else
            quality = STALE_INDEX_OK;
        SIMPLE_CACHE_UMA(ENUMERATION, "StaleIndexQuality", cache_type, quality,
            STALE_INDEX_MAX);
    }

    bool WritePickleFile(base::Pickle* pickle, const base::FilePath& file_name)
    {
        File file(
            file_name,
            File::FLAG_CREATE_ALWAYS | File::FLAG_WRITE | File::FLAG_SHARE_DELETE);
        if (!file.IsValid())
            return false;

        int bytes_written = file.Write(0, static_cast<const char*>(pickle->data()), pickle->size());
        if (bytes_written != base::checked_cast<int>(pickle->size())) {
            simple_util::SimpleCacheDeleteFile(file_name);
            return false;
        }
        return true;
    }

    // Called for each cache directory traversal iteration.
    void ProcessEntryFile(SimpleIndex::EntrySet* entries,
        const base::FilePath& file_path)
    {
        static const size_t kEntryFilesLength = kEntryFilesHashLength + kEntryFilesSuffixLength;
        // Converting to std::string is OK since we never use UTF8 wide chars in our
        // file names.
        const base::FilePath::StringType base_name = file_path.BaseName().value();
        const std::string file_name(base_name.begin(), base_name.end());
        if (file_name.size() != kEntryFilesLength)
            return;
        const base::StringPiece hash_string(
            file_name.begin(), file_name.begin() + kEntryFilesHashLength);
        uint64_t hash_key = 0;
        if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) {
            LOG(WARNING) << "Invalid entry hash key filename while restoring index from"
                         << " disk: " << file_name;
            return;
        }

        File::Info file_info;
        if (!base::GetFileInfo(file_path, &file_info)) {
            LOG(ERROR) << "Could not get file info for " << file_path.value();
            return;
        }
        base::Time last_used_time;
#if defined(OS_POSIX)
        // For POSIX systems, a last access time is available. However, it's not
        // guaranteed to be more accurate than mtime. It is no worse though.
        last_used_time = file_info.last_accessed;
#endif
        if (last_used_time.is_null())
            last_used_time = file_info.last_modified;

        int64_t file_size = file_info.size;
        SimpleIndex::EntrySet::iterator it = entries->find(hash_key);
        if (it == entries->end()) {
            SimpleIndex::InsertInEntrySet(
                hash_key,
                EntryMetadata(last_used_time, file_size),
                entries);
        } else {
            // Summing up the total size of the entry through all the *_[0-1] files
            it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
        }
    }

} // namespace

SimpleIndexLoadResult::SimpleIndexLoadResult()
    : did_load(false)
    , index_write_reason(SimpleIndex::INDEX_WRITE_REASON_MAX)
    , flush_required(false)
{
}

SimpleIndexLoadResult::~SimpleIndexLoadResult()
{
}

void SimpleIndexLoadResult::Reset()
{
    did_load = false;
    index_write_reason = SimpleIndex::INDEX_WRITE_REASON_MAX;
    flush_required = false;
    entries.clear();
}

// static
const char SimpleIndexFile::kIndexFileName[] = "the-real-index";
// static
const char SimpleIndexFile::kIndexDirectory[] = "index-dir";
// static
const char SimpleIndexFile::kTempIndexFileName[] = "temp-index";

SimpleIndexFile::IndexMetadata::IndexMetadata()
    : magic_number_(kSimpleIndexMagicNumber)
    , version_(kSimpleVersion)
    , reason_(SimpleIndex::INDEX_WRITE_REASON_MAX)
    , entry_count_(0)
    , cache_size_(0)
{
}

SimpleIndexFile::IndexMetadata::IndexMetadata(
    SimpleIndex::IndexWriteToDiskReason reason,
    uint64_t entry_count,
    uint64_t cache_size)
    : magic_number_(kSimpleIndexMagicNumber)
    , version_(kSimpleVersion)
    , reason_(reason)
    , entry_count_(entry_count)
    , cache_size_(cache_size)
{
}

void SimpleIndexFile::IndexMetadata::Serialize(base::Pickle* pickle) const
{
    DCHECK(pickle);
    pickle->WriteUInt64(magic_number_);
    pickle->WriteUInt32(version_);
    pickle->WriteUInt64(entry_count_);
    pickle->WriteUInt64(cache_size_);
    pickle->WriteUInt32(static_cast<uint32_t>(reason_));
}

// static
bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified,
    base::Pickle* pickle)
{
    if (!pickle->WriteInt64(cache_modified.ToInternalValue()))
        return false;
    SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>();
    header_p->crc = CalculatePickleCRC(*pickle);
    return true;
}

bool SimpleIndexFile::IndexMetadata::Deserialize(base::PickleIterator* it)
{
    DCHECK(it);

    bool v6_format_index_read_results = it->ReadUInt64(&magic_number_) && it->ReadUInt32(&version_) && it->ReadUInt64(&entry_count_) && it->ReadUInt64(&cache_size_);
    if (!v6_format_index_read_results)
        return false;
    if (version_ >= 7) {
        uint32_t tmp_reason;
        if (!it->ReadUInt32(&tmp_reason))
            return false;
        reason_ = static_cast<SimpleIndex::IndexWriteToDiskReason>(tmp_reason);
    }
    return true;
}

void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type,
    const base::FilePath& cache_directory,
    const base::FilePath& index_filename,
    const base::FilePath& temp_index_filename,
    std::unique_ptr<base::Pickle> pickle,
    const base::TimeTicks& start_time,
    bool app_on_background)
{
    DCHECK_EQ(index_filename.DirName().value(),
        temp_index_filename.DirName().value());
    base::FilePath index_file_directory = temp_index_filename.DirName();
    if (!base::DirectoryExists(index_file_directory) && !base::CreateDirectory(index_file_directory)) {
        LOG(ERROR) << "Could not create a directory to hold the index file";
        return;
    }

    // There is a chance that the index containing all the necessary data about
    // newly created entries will appear to be stale. This can happen if on-disk
    // part of a Create operation does not fit into the time budget for the index
    // flush delay. This simple approach will be reconsidered if it does not allow
    // for maintaining freshness.
    base::Time cache_dir_mtime;
    if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) {
        LOG(ERROR) << "Could obtain information about cache age";
        return;
    }
    SerializeFinalData(cache_dir_mtime, pickle.get());
    if (!WritePickleFile(pickle.get(), temp_index_filename)) {
        LOG(ERROR) << "Failed to write the temporary index file";
        return;
    }

    // Atomically rename the temporary index file to become the real one.
    // TODO(gavinp): DCHECK when not shutting down, since that is very strange.
    // The rename failing during shutdown is legal because it's legal to begin
    // erasing a cache as soon as the destructor has been called.
    if (!base::ReplaceFile(temp_index_filename, index_filename, NULL))
        return;

    if (app_on_background) {
        SIMPLE_CACHE_UMA(TIMES,
            "IndexWriteToDiskTime.Background", cache_type,
            (base::TimeTicks::Now() - start_time));
    } else {
        SIMPLE_CACHE_UMA(TIMES,
            "IndexWriteToDiskTime.Foreground", cache_type,
            (base::TimeTicks::Now() - start_time));
    }
}

bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata()
{
    if (entry_count_ > kMaxEntriesInIndex || magic_number_ != kSimpleIndexMagicNumber) {
        return false;
    }

    static_assert(kSimpleVersion == 7, "index metadata reader out of date");
    // No |reason_| is saved in the version 6 file format.
    if (version_ == 6)
        return reason_ == SimpleIndex::INDEX_WRITE_REASON_MAX;
    return version_ == 7 && reason_ < SimpleIndex::INDEX_WRITE_REASON_MAX;
}

SimpleIndexFile::SimpleIndexFile(
    const scoped_refptr<base::SingleThreadTaskRunner>& cache_thread,
    const scoped_refptr<base::TaskRunner>& worker_pool,
    net::CacheType cache_type,
    const base::FilePath& cache_directory)
    : cache_thread_(cache_thread)
    , worker_pool_(worker_pool)
    , cache_type_(cache_type)
    , cache_directory_(cache_directory)
    , index_file_(cache_directory_.AppendASCII(kIndexDirectory)
                      .AppendASCII(kIndexFileName))
    , temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory)
                           .AppendASCII(kTempIndexFileName))
{
}

SimpleIndexFile::~SimpleIndexFile() { }

void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified,
    const base::Closure& callback,
    SimpleIndexLoadResult* out_result)
{
    base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries,
        cache_type_,
        cache_last_modified, cache_directory_,
        index_file_, out_result);
    worker_pool_->PostTaskAndReply(FROM_HERE, task, callback);
}

void SimpleIndexFile::WriteToDisk(SimpleIndex::IndexWriteToDiskReason reason,
    const SimpleIndex::EntrySet& entry_set,
    uint64_t cache_size,
    const base::TimeTicks& start,
    bool app_on_background,
    const base::Closure& callback)
{
    UmaRecordIndexWriteReason(reason, cache_type_);
    IndexMetadata index_metadata(reason, entry_set.size(), cache_size);
    std::unique_ptr<base::Pickle> pickle = Serialize(index_metadata, entry_set);
    base::Closure task = base::Bind(&SimpleIndexFile::SyncWriteToDisk,
        cache_type_, cache_directory_, index_file_, temp_index_file_,
        base::Passed(&pickle), start, app_on_background);
    if (callback.is_null())
        cache_thread_->PostTask(FROM_HERE, task);
    else
        cache_thread_->PostTaskAndReply(FROM_HERE, task, callback);
}

// static
void SimpleIndexFile::SyncLoadIndexEntries(
    net::CacheType cache_type,
    base::Time cache_last_modified,
    const base::FilePath& cache_directory,
    const base::FilePath& index_file_path,
    SimpleIndexLoadResult* out_result)
{
    // Load the index and find its age.
    base::Time last_cache_seen_by_index;
    SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result);

    // Consider the index loaded if it is fresh.
    const bool index_file_existed = base::PathExists(index_file_path);
    if (!out_result->did_load) {
        if (index_file_existed)
            UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type);
    } else {
        if (cache_last_modified <= last_cache_seen_by_index) {
            if (out_result->index_write_reason != SimpleIndex::INDEX_WRITE_REASON_MAX) {
                UmaRecordIndexWriteReasonAtLoad(out_result->index_write_reason,
                    cache_type);
            }
            base::Time latest_dir_mtime;
            simple_util::GetMTime(cache_directory, &latest_dir_mtime);
            if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) {
                UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES,
                    cache_type);
            } else {
                UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type);
            }
            out_result->init_method = SimpleIndex::INITIALIZE_METHOD_LOADED;
            UmaRecordIndexInitMethod(out_result->init_method, cache_type);
            return;
        }
        UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type);
    }

    // Reconstruct the index by scanning the disk for entries.
    SimpleIndex::EntrySet entries_from_stale_index;
    entries_from_stale_index.swap(out_result->entries);
    const base::TimeTicks start = base::TimeTicks::Now();
    SyncRestoreFromDisk(cache_directory, index_file_path, out_result);
    SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type,
        base::TimeTicks::Now() - start);
    SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type,
        out_result->entries.size());
    if (index_file_existed) {
        out_result->init_method = SimpleIndex::INITIALIZE_METHOD_RECOVERED;

        int missed_entry_count = 0;
        for (const auto& i : out_result->entries) {
            if (entries_from_stale_index.count(i.first) == 0)
                ++missed_entry_count;
        }
        int extra_entry_count = 0;
        for (const auto& i : entries_from_stale_index) {
            if (out_result->entries.count(i.first) == 0)
                ++extra_entry_count;
        }
        UmaRecordStaleIndexQuality(missed_entry_count, extra_entry_count,
            cache_type);
    } else {
        out_result->init_method = SimpleIndex::INITIALIZE_METHOD_NEWCACHE;
        SIMPLE_CACHE_UMA(COUNTS,
            "IndexCreatedEntryCount", cache_type,
            out_result->entries.size());
    }
    UmaRecordIndexInitMethod(out_result->init_method, cache_type);
}

// static
void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename,
    base::Time* out_last_cache_seen_by_index,
    SimpleIndexLoadResult* out_result)
{
    out_result->Reset();

    File file(index_filename,
        File::FLAG_OPEN | File::FLAG_READ | File::FLAG_SHARE_DELETE);
    if (!file.IsValid())
        return;

    base::MemoryMappedFile index_file_map;
    if (!index_file_map.Initialize(std::move(file))) {
        simple_util::SimpleCacheDeleteFile(index_filename);
        return;
    }

    SimpleIndexFile::Deserialize(
        reinterpret_cast<const char*>(index_file_map.data()),
        index_file_map.length(),
        out_last_cache_seen_by_index,
        out_result);

    if (!out_result->did_load)
        simple_util::SimpleCacheDeleteFile(index_filename);
}

// static
std::unique_ptr<base::Pickle> SimpleIndexFile::Serialize(
    const SimpleIndexFile::IndexMetadata& index_metadata,
    const SimpleIndex::EntrySet& entries)
{
    std::unique_ptr<base::Pickle> pickle(
        new base::Pickle(sizeof(SimpleIndexFile::PickleHeader)));

    index_metadata.Serialize(pickle.get());
    for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
         it != entries.end(); ++it) {
        pickle->WriteUInt64(it->first);
        it->second.Serialize(pickle.get());
    }
    return pickle;
}

// static
void SimpleIndexFile::Deserialize(const char* data, int data_len,
    base::Time* out_cache_last_modified,
    SimpleIndexLoadResult* out_result)
{
    DCHECK(data);

    out_result->Reset();
    SimpleIndex::EntrySet* entries = &out_result->entries;

    base::Pickle pickle(data, data_len);
    if (!pickle.data()) {
        LOG(WARNING) << "Corrupt Simple Index File.";
        return;
    }

    base::PickleIterator pickle_it(pickle);
    SimpleIndexFile::PickleHeader* header_p = pickle.headerT<SimpleIndexFile::PickleHeader>();
    const uint32_t crc_read = header_p->crc;
    const uint32_t crc_calculated = CalculatePickleCRC(pickle);

    if (crc_read != crc_calculated) {
        LOG(WARNING) << "Invalid CRC in Simple Index file.";
        return;
    }

    SimpleIndexFile::IndexMetadata index_metadata;
    if (!index_metadata.Deserialize(&pickle_it)) {
        LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
        return;
    }

    if (!index_metadata.CheckIndexMetadata()) {
        LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
        return;
    }

    entries->reserve(index_metadata.entry_count() + kExtraSizeForMerge);
    while (entries->size() < index_metadata.entry_count()) {
        uint64_t hash_key;
        EntryMetadata entry_metadata;
        if (!pickle_it.ReadUInt64(&hash_key) || !entry_metadata.Deserialize(&pickle_it)) {
            LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
            entries->clear();
            return;
        }
        SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries);
    }

    int64_t cache_last_modified;
    if (!pickle_it.ReadInt64(&cache_last_modified)) {
        entries->clear();
        return;
    }
    DCHECK(out_cache_last_modified);
    *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified);

    out_result->index_write_reason = index_metadata.reason();
    out_result->did_load = true;
}

// static
void SimpleIndexFile::SyncRestoreFromDisk(
    const base::FilePath& cache_directory,
    const base::FilePath& index_file_path,
    SimpleIndexLoadResult* out_result)
{
    VLOG(1) << "Simple Cache Index is being restored from disk.";
    simple_util::SimpleCacheDeleteFile(index_file_path);
    out_result->Reset();
    SimpleIndex::EntrySet* entries = &out_result->entries;

    const bool did_succeed = TraverseCacheDirectory(
        cache_directory, base::Bind(&ProcessEntryFile, entries));
    if (!did_succeed) {
        LOG(ERROR) << "Could not reconstruct index from disk";
        return;
    }
    out_result->did_load = true;
    // When we restore from disk we write the merged index file to disk right
    // away, this might save us from having to restore again next time.
    out_result->flush_required = true;
}

// static
bool SimpleIndexFile::LegacyIsIndexFileStale(
    base::Time cache_last_modified,
    const base::FilePath& index_file_path)
{
    base::Time index_mtime;
    if (!simple_util::GetMTime(index_file_path, &index_mtime))
        return true;
    return index_mtime < cache_last_modified;
}

} // namespace disk_cache
